/*==============================================================================
IC - EU - Home_own, hh_size 

Outline: 
I. 	Bring in home ownership from 1981 Austria & Canadian Censuses, 1991 Spanish census 
II. 	Bring in data from original sources
			 A) Belgium 
			 B) Canada
			 C) Denmark
			 D) Finland
			 E) Germany
			 F) Italy
			 G) Netherlands
			 H) Spain
			 I) Sweden
			 J) UK
			 
III. 	Append above home ownership data together

==============================================================================*/      

*===============================================================================
*I. Bring in 1981 Austria & Canadian Censuses; Bring in 1991 Spain Census 
*===============================================================================     

clear
cd "$scratch/IPUMS International Data"

! uncompress ipumsi_00013.dat.Z  
quietly do ipumsi_00013.do

gen country = "AT" if cntry==40 
	replace country = "CA" if cntry == 124
	replace country = "ES" if cntry == 724 

tostring enuts3, replace  force
gen nuts3 = country + substr(enuts3,2,4) if country=="AT"	
	replace nuts3= country+ substr(enuts3,2,4) if country=="ES"

*Composite NUTS
replace nuts3="AT111&AT113" if nuts3=="AT111"	

keep serial ownrshp wthh nuts country provca
duplicates drop //keep one observation per household

gen home_own = 1 if ownrshp==1 
gen home_no_own = 1 if ownrshp ==2 
gen home_unknown_own = 1 if ownrshp ==3 

gen weight = round(wthh)

tempfile precollapse
save `precollapse.dta'

keep if country=="CA"

collapse (sum) home* [fw = weight], by(provca)

tempfile ca_province
save `ca_province.dta'

collapse (sum) home*

gen region =1 

append using `ca_province.dta'

replace region= 124001 if provca ==2
replace region= 124002 if provca ==3
replace region= 124003 if provca ==4
replace region= 124004 if provca ==5
replace region= 124005 if provca ==6
replace region= 124006 if provca ==7
replace region= 124007 if provca ==8 
replace region= 124008 if provca ==9
replace region= 124009 if provca ==10
replace region= 124010 if provca ==1

gen nuts=""
replace nuts = "Canada" if region ==1
replace nuts = "CA: Prince Edward Island" if region == 124001
replace nuts =  "CA: Nova Scotia"  if region ==  124002
replace nuts =  "CA: New Brunswick"  if region == 124003
replace nuts =  "CA: Quebec"  if region == 124004
replace nuts =  "CA: Ontario"  if region ==  124005
replace nuts =  "CA: Manitoba"  if region ==  124006
replace nuts =  "CA: Saskatchewan"  if region ==  124007
replace nuts =  "CA: Alberta" if region ==  124008
replace nuts =  "CA: British Columbia"  if region == 124009
replace nuts =  "CA: Newfoundland and Labrador"  if region == 124010

save "$dta_files/IC_CA_home_own.dta", replace 

use `precollapse.dta', clear

keep if country=="AT" | country =="ES"

collapse (sum) home* [fw = weight], by(nuts3)

rename nuts3 nuts

tempfile at_es_nuts3
save `at_es_nuts3.dta'

gen nuts2 = substr(nuts,1,4)

collapse (sum) home*, by(nuts2)

rename nuts2 nuts

tempfile at_es_nuts2
save `at_es_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) home*, by(nuts1)

rename nuts1 nuts

tempfile at_es_nuts1
save `at_es_nuts1.dta'

gen country=substr(nuts,1,2) 

collapse (sum) home*, by(country)

rename country nuts

append using `at_es_nuts1.dta'
append using `at_es_nuts2.dta'
append using `at_es_nuts3.dta'

tempfile at_es_hh_own
save `at_es_hh_own.dta'

! compress ipumsi_00013.dat 

*===============================================================================
*II. 	Bring in data from original sources
*===============================================================================

*****************************   A) Belgium    **********************************
cd "$insheet_files/Belgium"
import excel "BE_HH_1970.xls", first sheet("HH_Processed") cellrange(A2:M15) clear

keep if nuts!=""

collapse (sum) persons*, by(nuts) //these are NUTS 2

tempfile be_nuts2
save `be_nuts2.dta'

gen nuts1 = substr(nuts,1,3) 

collapse (sum) persons*, by(nuts1) 

rename nuts1 nuts

tempfile be_nuts1
save `be_nuts1.dta'

collapse (sum) persons*

gen nuts = "BE"

append using `be_nuts1.dta'
append using `be_nuts2.dta'

replace persons_8 = persons_8 + persons_9 + persons_10 //since Brabant breakdown only goes up to 8+
drop persons_9 persons_10

egen total = rsum(persons*)
foreach var of varlist persons* {
	gen `var'_share = `var' / total
}

forval n = 1/ 8 {
	gen mean_`n' = persons_`n'_share*`n'
}

egen hh_size = rsum(mean*)

keep nuts hh_size

tempfile be_hh_size
save `be_hh_size.dta'

import excel "BE_HH_1970.xls", sheet("Table5") cellrange(A8:H63) first clear

keep if nuts!=""

keep nuts home_own_1970 own_share_1970

rename home_own_1970 home_own
gen home_total = home_own / own_share*100

gen home_no_own = home_total - home_own

keep nuts home_own home_no_own

tempfile be_nuts3
save `be_nuts3.dta'

gen nuts2 = substr(nuts,1,4)

collapse (sum) home_own home_no_own, by(nuts2)

rename nuts2 nuts

tempfile be_nuts2
save `be_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) home_own home_no_own, by(nuts1)

rename nuts1 nuts

tempfile be_nuts1
save `be_nuts1.dta'

collapse (sum) home_own home_no_own

gen nuts ="BE"

append using `be_nuts1.dta'
append using `be_nuts2.dta'
append using `be_nuts3.dta'

merge 1:1 nuts using `be_hh_size.dta', nogen

tempfile be_hh_own_hh_size
save `be_hh_own_hh_size.dta'

*****************************   B) Canada    **********************************
cd "$insheet_files/Canada"
import excel using "CA_HH_1971.xls", first clear

keep if region!=.
keep region hh_size

gen nuts=""
replace nuts = "Canada" if region ==1
replace nuts = "CA: Prince Edward Island" if region == 124001
replace nuts =  "CA: Nova Scotia"  if region ==  124002
replace nuts =  "CA: New Brunswick"  if region == 124003
replace nuts =  "CA: Quebec"  if region == 124004
replace nuts =  "CA: Ontario"  if region ==  124005
replace nuts =  "CA: Manitoba"  if region ==  124006
replace nuts =  "CA: Saskatchewan"  if region ==  124007
replace nuts =  "CA: Alberta" if region ==  124008
replace nuts =  "CA: British Columbia"  if region == 124009
replace nuts =  "CA: Newfoundland and Labrador"  if region == 124010

save "$dta_files/IC_CA_hh_size.dta", replace 

*****************************   C) Denmark    **********************************
cd "$insheet_files/Denmark"

import excel "DK_HH_1970.xls", sheet("HH_Processed") first clear
keep if nuts!= ""

rename owner_occupied home_own
rename rented home_no_own

collapse (sum) home_own home_no_own persons*, by(nuts) 

tempfile dk_nuts3
save `dk_nuts3.dta'

gen nuts2 = substr(nuts,1,4)

collapse (sum) home_own home_no_own persons*, by(nuts2)

rename nuts2 nuts

tempfile dk_nuts2
save `dk_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) home_own home_no_own persons*, by(nuts1)

rename nuts1 nuts

tempfile dk_nuts1
save `dk_nuts1.dta'

collapse (sum) home_own home_no_own persons*

gen nuts ="DK"

append using `dk_nuts1.dta'
append using `dk_nuts2.dta'
append using `dk_nuts3.dta' 

egen total = rsum(persons*)
foreach var of varlist persons* {
	gen `var'_share = `var' / total
}

forval n = 1/ 8 {
	gen mean_`n' = persons_`n'_share*`n'
}

egen hh_size = rsum (mean*)

keep nuts home* hh_size

tempfile dk_hh_own_hh_size
save `dk_hh_own_hh_size.dta'

*****************************   D) Finland    **********************************
cd "$insheet_files/Finland"
import excel using "FI_HH_1975.xls", sheet("home_own") first clear

collapse (sum) total_dwellings-occupants, by(nuts)

rename privately_owned home_own
gen home_no_own = total_dwellings - home_own

keep nuts home_own home_no_own

tempfile fi_home_own
save `fi_home_own.dta'

import excel "FI_HH_1975.xls",  sheet("hh_size") first clear

keep nuts persons_1*

collapse (sum) persons*, by(nuts)

forval n = 1/7 {
	rename persons_1_`n' persons_`n'
}

egen total = rsum(persons*)
foreach var of varlist persons* {
	gen `var'_share = `var' / total
}

forval n = 1/ 7 {
	gen mean_`n' = persons_`n'_share*`n'
}

egen hh_size = rsum (mean*)

keep nuts hh_size 

merge 1:1 nuts using `fi_home_own.dta', nogen 

tempfile fi_hh_own_hh_size
save `fi_hh_own_hh_size.dta'

*****************************   E) Germany    **********************************
clear
cd "$insheet_files/Germany"

import excel using "DE_HH_1970.xls", cellrange(A3:C649) ///
	sheet("hh_own") first

ssc install carryforward
carryforward NUTS3, replace

rename NUTS3 nuts
replace Data = "Owned" if Data=="Sum - Owned"
replace Data = "Rented" if Data=="Sum - Rented"

reshape wide C, i(nuts) j(Data) string

rename COwned home_own
rename CRented home_no_own

tempfile de_nuts3
save `de_nuts3.dta'

gen nuts2 = substr(nuts,1,4)
replace nuts2 ="DE71&DE72" if (nuts=="DE71"|nuts=="DE72")

collapse (sum) home*, by(nuts2)

rename nuts2 nuts

tempfile de_nuts2
save `de_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) home_own home_no_own, by(nuts1)

rename nuts1 nuts

tempfile de_nuts1
save `de_nuts1.dta'

collapse (sum) home_own home_no_own 

gen nuts ="DEF&DE6&DE5&DE9&DEA&DEB&DEC&DE1&DE2&DE3&DE7" //West Germany
	
append using `de_nuts1.dta'
append using `de_nuts2.dta'
append using `de_nuts3.dta'

tempfile de_home_own
save `de_home_own.dta'

clear
import excel using "DE_HH_1970.xls", cellrange(A1:H30) sheet("hh_size") first

foreach var of varlist persons* {
	replace `var' = `var'/100*PPIVATHAUSHALTEINSGESAMT
}

keep nuts persons*

tempfile de_nuts2
save `de_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) persons*, by(nuts1)

rename nuts1 nuts

tempfile de_nuts1
save `de_nuts1.dta'

collapse (sum) persons*

gen nuts ="DEF&DE6&DE5&DE9&DEA&DEB&DEC&DE1&DE2&DE3&DE7" //West Germany
	
append using `de_nuts1.dta'
append using `de_nuts2.dta'

egen total = rsum(persons*)
forval n = 1/ 6 {
	gen mean_`n' = persons_`n'/total*`n'
}
egen hh_size = rsum (mean*)

keep nuts hh_size

merge 1:1 nuts using `de_home_own.dta' , nogen

tempfile de_hh_own_hh_size
save `de_hh_own_hh_size.dta'

*****************************   F) Italy    *************************************
clear
cd "$insheet_files/Italy"

import excel "IT_HH_1971.xls", first cellrange(A2:J116) sheet("home_own") //excel contains all nuts levels

collapse (sum) H I , by(nuts)

rename H home_own
rename I home_no_own

tempfile it_home_own
save `it_home_own'

clear
import excel "IT_HH_1971.xls", first cellrange(A2:M116) sheet("hh_size") //excel contains all nuts levels

collapse (sum) persons* total, by(nuts)

foreach var of varlist persons* {
	gen `var'_share = `var' / total
}

forval n = 1/ 9 {
	gen mean_`n' = persons_`n'_share*`n'
}
egen hh_size = rsum (mean*)

keep nuts hh_size

merge 1:1 nuts using `it_home_own.dta', nogen 

tempfile it_hh_own_hh_size
save `it_hh_own_hh_size.dta'

*****************************   G) Netherlands    **********************************
clear
cd "$insheet_files/Netherlands"

import excel using "NL_HH_1970", first sheet("hh_own")

keep nuts home* 

tempfile nl_nuts2
save `nl_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) home* , by(nuts1)

rename nuts1 nuts

tempfile nl_nuts1
save `nl_nuts1.dta'

collapse (sum) home* 

gen nuts ="NL"
	
append using `nl_nuts1.dta'
append using `nl_nuts2.dta'

keep nuts home*

tempfile nl_home_own
save `nl_home_own.dta'

clear
import excel using "NL_HH_1970.xls", first sheet("hh_size")

tempfile nl_nuts2
save `nl_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) persons*, by(nuts1)

rename nuts1 nuts

tempfile nl_nuts1
save `nl_nuts1.dta'

collapse (sum) persons*

gen nuts = "NL"

append using `nl_nuts1.dta'
append using `nl_nuts2.dta'

egen total = rsum(person*)
forval n = 1/ 10 {
	gen mean_`n' = persons_`n'/total*`n'
}
egen hh_size = rsum (mean*)

keep nuts hh_size

merge 1:1 nuts using `nl_home_own.dta'

keep nuts hh_size home* 

tempfile nl_hh_own_hh_size
save `nl_hh_own_hh_size.dta'

*****************************   H) Spain    ************************************
cd "$insheet_files/Spain"

import excel "ES_HH_1970.xls", first sheet("HH_Processed") clear

collapse (sum) persons*, by(nuts) 

tempfile es_nuts3
save `es_nuts3.dta'

gen nuts2 = substr(nuts,1,4)

collapse (sum) persons*, by(nuts2)

rename nuts2 nuts

tempfile es_nuts2
save `es_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) persons*, by(nuts1)

rename nuts1 nuts

tempfile es_nuts1
save `es_nuts1.dta'

collapse (sum) persons*

gen nuts ="ES"

append using `es_nuts1.dta'
append using `es_nuts2.dta'
append using `es_nuts3.dta' 

egen total = rsum(persons*)
foreach var of varlist persons* {
	gen `var'_share = `var' / total
}

forval n = 1/ 10 {
	gen mean_`n' = persons_`n'_share*`n'
}

egen hh_size = rsum(mean*)

keep nuts hh_size

tempfile es_hh_size
save `es_hh_size.dta'

*****************************   I) Sweden   ************************************
cd "$insheet_files/Sweden"

import excel "SE_HH_1970.xls", first clear

collapse (sum) home* persons* Total, by(nuts)

tempfile se_nuts3
save `se_nuts3.dta'

gen nuts2 = substr(nuts,1,4)

collapse (sum) home* persons* Total, by(nuts2)

rename nuts2 nuts

tempfile se_nuts2
save `se_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) home* persons* Total, by(nuts1)

rename nuts1 nuts

tempfile se_nuts1
save `se_nuts1.dta'

collapse (sum) home* persons* Total

gen nuts ="SE"

append using `se_nuts1.dta'
append using `se_nuts2.dta'
append using `se_nuts3.dta' 

foreach var of varlist persons* {
	gen `var'_share = `var' / Total
}

forval n = 1/ 6 {
	gen mean_`n' = persons_`n'_share*`n'
}

egen hh_size = rsum (mean*)

keep nuts home* hh_size

tempfile se_hh_own_hh_size
save `se_hh_own_hh_size.dta'

*****************************   J) UK       ************************************
cd "$insheet_files/UK"
import excel "UK_HH_1966.xls", first clear

keep nuts TotalHouseholds TotalPersons home_own home_no_own home_unknown_own

tempfile uk_nuts1
save `uk_nuts1.dta'

collapse (sum) TotalHouseholds TotalPersons home_own home_no_own home_unknown_own

gen nuts= "UK" 

append using `uk_nuts1.dta'

gen hh_size = TotalPersons/TotalHouseholds 

keep nuts home* hh_size

tempfile uk_hh_own_hh_size
save `uk_hh_own_hh_size.dta'

********************************************************************************
*IV. 	Append all home ownership data together
clear

use `at_es_hh_own.dta'
	merge 1:1 nuts using `es_hh_size.dta', nogen update

append using `be_hh_own_hh_size.dta'
append using `fi_hh_own_hh_size.dta'
append using `dk_hh_own_hh_size.dta'
append using `de_hh_own_hh_size.dta'

append using `it_hh_own_hh_size.dta'
append using `nl_hh_own_hh_size.dta'
append using `uk_hh_own_hh_size.dta'
append using `se_hh_own_hh_size.dta'


keep nuts home_own home_no_own home_unknown_own hh_size
save "$dta_files/IC_EU_hh_own.dta", replace

